#!/usr/bin/env ruby

LKP_SRC ||= ENV['LKP_SRC'] || File.dirname(__dir__)

require "#{LKP_SRC}/lib/cci"
require "#{LKP_SRC}/lib/hash"
require "#{LKP_SRC}/lib/stats"
require 'json'
require 'time'

DEFAULT_FIELDS = %w[suite id job_stage job_health testbox osv start_time end_time]
QUERY_CONDITION = 'order by submit_time desc'

KEYWORD = %w[
  suite
  os
  arch
  category
  job_state
  job_health
  job_stage
  tbox_group
  upstream_repo
  summary.success
  summary.any_fail
  summary.any_error
  summary.any_stderr
  summary.any_warning
].freeze

def validate_merge_options(opts, param_name)
  # Parsing user input
  parts = opts.split(',')
  # Check whether the input meets the format requirements
  if parts.length != 2
    puts "wrong: The #{param_name} parameter must be like the form '[Number rule,String rule]'"
    exit 1
  end

  a, b = parts

  valid_options_a = %w(avg min max stddev sum count first last)
  valid_options_b = %w(count first last)

  unless valid_options_a.include?(a) && valid_options_b.include?(b)
    puts "wrong: The number rule must be one of #{valid_options_a.join('|')},the string rule must be one of #{valid_options_b.join('|')}"
    exit 1
  end

  return parts
end

class Jobs
  attr_reader :es_response, :jobs, :group_jobs
  attr_accessor :field_specs, :group_fields, :output_fields, :query_fields, :where_list
  attr_accessor :is_errid
  attr_accessor :last_time
  attr_accessor :show_type
  attr_accessor :size

  def initialize()
    @has_kvcount = false
  end

  def do_query
    @es_response = es_opendistro_query(@query_hash)
    format_es_to_jobs
  end

  # es_response{}=>jobs[]
  def format_es_to_jobs
    es_response = @es_response if es_response.nil?
    results = []
    es_response['hits']['hits'].each do |source|
      results << source['_source']
    end
    die('query results is empty') if results.empty?
    @jobs = results
  end

  # 对应 --show_type <es|json|array|group> 进行对应输出
  def format_data_print
    case @show_type
    when 'es'
      puts JSON.pretty_generate(@es_response)
    when 'json'
      puts JSON.pretty_generate(@jobs)
    when 'array'
      format_print(@jobs, @output_fields)
    when 'group'
      puts JSON.pretty_generate(@merge_jobs)
    else
      puts 'Wrong show_type'
    end
  end

  # 根据last_time来过滤出最近提交的jobs
  def filter_last_time(last_time)
    last_time = Integer(last_time)
    @jobs.delete_if do |job|
      next false if not job.include? 'start_time' # jobs in 'submit' stage has no start_time
      start_time = Time.parse(job['start_time'])
      diff = (Time.now - start_time).to_i
      diff.abs > last_time * 24 * 60 * 60
    end
  end

  def sortjobs(field, is_asc)
  # 对jobs[]进行排序，is_asc为真则为升序排序，否则为倒序
    if is_asc
      @jobs.sort_by! { |job| job[field] }
    else
      @jobs.sort_by! { |job| job[field].reverse }
    end
  end

  def sortgroup_jobs(group_fields, field, is_asc)
  # 对merge_jobs[][]进行排序，若group_fields中含有field说明组间排序，否则组内排序
    if group_fields.include? field
      @merge_jobs = if is_asc
                      @merge_jobs.sort { |a, b| a[1][0][field] <=> b[1][0][field] }
                    else
                      @merge_jobs.sort { |a, b| b[1][0][field] <=> a[1][0][field] }
                    end
    else
      @merge_jobs.each do |group_job|
        if is_asc
          group_job.last.sort_by! { |job| job[field] }
        else
          group_job.last.sort_by! { |job| job[field].reverse }
        end
      end
    end
  end

  def sortbyfield(field, is_asc)
    if @show_type == 'group'
      sortgroup_jobs(@group_fields, field, is_asc)
    else
      sortjobs(field, is_asc)
    end
  end

  # 统计显示各类errid及其最后一个result_root
  def count_show_errid
    # errids结构如下：
    # errids{
    #   errid=>{
    #     count
    #     result_root
    #   }
    #   ...
    # }
    errids = {}

    # 将jobs内的错误信息提取，存放到errids中
    @jobs.each do |job|
      next unless job.has_key?('errid')

      value = job['errid']
      value.each do |errid|
        errids[errid] ||= {}
        errids[errid]['count'] ||= 1
        errids[errid]['count'] += 1
        errids[errid]['result_root'] = job['result_root']
      end
    end

    # 对errids排序，将count多的errid排在前面
    errids = errids.sort_by { |_k, v| -v['count'] }

    # 遍历errids，输出结果
    # 样式如下：
    #   8       last_state.exit_fail
    #   /srv/result/iozone/2023-01-31/vm-2p8g/centos-7.6.1810-aarch64/1-xfs-kyber-64k-4g-writereadrand_rw/crystal.6654547
    #
    #   5   stderr.Dload_Upload_Total_Spent_Left_Speed
    #   /srv/result/borrow/2023-01-31/vm-2p8g/openeuler-20.03-SP1-aarch64/10/crystal.6654492
    #
    #   5   stderr.%Total%Received%Xferd_Average_Speed_Time_Time_Time_Current
    #   /srv/result/borrow/2023-01-31/vm-2p8g/openeuler-20.03-SP1-aarch64/10/crystal.6654492
    errids.each do |errid, value|
      printf "%d    %s\n%s\n\n", value['count'], errid, value['result_root']
    end
  end

  # 对传入的jobs进行去重，并根据传入的参数，重新生成jobs来取代原有的jobs
  def merge_by(dims, options = nil)
    group_jobs(dims)
    merge_jobs(options)
    @jobs = @merge_jobs.values
  end

  # 按照指定的group_fields，对jobs进行分组，并根据传入的参数，对数据进行修改
  def group_jobs(group_fields)
    # group_jobs用来存放分组后的jobs
    # group_jobs{
    #   group_field=>[job]
    # }
    group_jobs = {}

    # 对jobs进行分组
    @jobs.each do |job|
      # 根据指定的group_fields，重新生成唯一的group_field
      group_field = group_fields.collect { |field| job[field] }.join('-')
      group_jobs[group_field] ||= []
      group_jobs[group_field].push(job)
    end
    @group_jobs = group_jobs
  end

  def merge_jobs(options = nil)
    merge_jobs = {}
    # 根据选项，对指定的字段进行计算
    @group_jobs.each do |group_field, jobs|
      # dataMap{
      #   field=>data[]
      # }
      dataMap = {}
      # 将一组jobs的值存放到dataMap
      jobs.each do |job|
        job.each do |field, value|
          dataMap[field] ||= []
          dataMap[field].push(value)
        end
      end

      # 用dataMap中的值构造new_job
      new_job = {}
      dataMap.each do |field, datas|
        if KEYWORD.include?(field) || field.start_with?('stats.element.') || field.start_with?('result.')
          kvcount_field(new_job, field, datas)
        else
          new_job[field] = calculate(datas, options)
        end
        if @group_fields.include? field
          new_job[field] = datas.first
        end
      end

      # 将new_job存放到merge_jobs
      merge_jobs[group_field] = new_job
    end
    @merge_jobs = merge_jobs
  end

  def kvcount_field(job, key, vals)
    vals.each do |val|
      job["kvcount.#{key}=#{val}"] ||= 0
      job["kvcount.#{key}=#{val}"] += 1
      if job.include? 'id'
        job["raw.id.[#{key}=#{val}]"] ||= []
        job["raw.id.[#{key}=#{val}]"] << job['id']
      end
    end
  end

  def calculate(datas, options)
    options = %w[avg last] if options.nil?

    option = nil

    option = if datas[0].is_a?(Integer)
               options[0]
             else
               options[1]
             end

    data_return = nil

    # 求一组数据的平均值
    if option == 'avg'
      sum = 0
      datas.each do |data|
        sum += data.to_i
      end
      data_return = sum / datas.length

      # 求一组数据中的最小值
    elsif option == 'min'
      min = (2**30) - 1
      datas.each do |data|
        min = data.to_i if min > data.to_i
      end
      data_return = min

      # 求一组数据中的最大值
    elsif option == 'max'
      max = -2**30
      datas.each do |data|
        max = data.to_i if max < data.to_i
      end
      data_return = max

      # 求一组数据的总值
    elsif option == 'sum'
      sum = 0
      datas.each do |data|
        sum += data.to_i
      end
      data_return = sum

      # 求一组数据中值的个数
    elsif option == 'count'
      data_return = datas.length

      # 求一组数据中第一个值
    elsif option == 'first'
      data_return = datas.first

      # 求一组数据中最后一个值
    elsif option == 'last'
      data_return = datas.last

    elsif option == 'stddev'
      # 求平均值
      sum = 0
      datas.each do |data|
        sum += data.to_i
      end
      avg = sum / datas.length
      # 求标准差
      datas.collect { |data| (data.to_i - avg)**2 }
      sum = 0
      datas.each do |data|
        sum += data.to_i
      end
      data_return = (sum / datas.length)**0.5
    else
      data_return = nil
    end

    data_return
  end

  def get_suite_kpi(suite)
    kpi_array = []
    program = YAML.load(File.open("#{LKP_SRC}/programs/#{suite}/meta.yaml"))
    unless program['results'].nil?
      program['results'].each do |key, value|
        kpi_array << key if value['kpi'] == 1
      end
    end
    kpi_array
  end

  def filter_stats_kpi
    @jobs.each do |job|
      suite = job['suite']
      stats = job['stats']
      return if suite.nil?
      next if stats.nil?

      kpi_array = get_suite_kpi(suite)
      job['stats'].delete_if { |key, _value| !kpi_array.include?(key.sub(/^[^.]+\./, '')) }
    end
  end

  def flat_hash(old_hash, new_hash)
    old_hash.each do |key1, value1|
      if value1.is_a?(Hash)
        next if value1.empty?

        temp_hash = {}
        value1.each do |key2, value2|
          temp_hash.merge!({ "#{key1}.#{key2}" => value2 })
        end
        flat_hash(temp_hash, new_hash)
      else
        new_hash.merge!({ key1 => value1 })
      end
    end
  end

  def flat_jobs
    @jobs.each do |job|
      new_job = {}
      flat_hash(job, new_job)
      new_job['stats'] = job['stats'] if job.include? 'stats' # keep for easy access all stats
      job.replace(new_job)
    end
  end

  # 移植原es-jobs.rb中es_jobs类的函数

  # 统计总的jobs状态：将每个job依据不同的stat关键字分类
  def add_job_summary(stats, job)
    summary_result = ''
    stats.each_key do |stat|
      # "stderr.linux-perf": 1,
      # "stderr.error:target_not_found:ruby-dev": 1,
      # "stderr.error:could_not_open_file/var/lib/pacman/local/ldb-#:#-#/desc:Not_a_directory": 1,
      if stat.match(/stderr\./i)
        job['summary.any_stderr'] = 1
        summary_result = 'stderr'
        next
      end

      # sum.stats.pkgbuild.mb_cache.c:warning:‘read_cache’defined-but-not-used[-Wunused-function]: 1
      # sum.stats.pkgbuild.mb_cache.c:warning:control-reaches-end-of-non-void-function[-Wreturn-type]: 1
      if stat.match(/:warning:|\.warning$/i)
        job['summary.any_warning'] = 1
        summary_result = 'warning'
      end

      # "last_state.test.iperf.exit_code.127": 1,
      # "last_state.test.cci-makepkg.exit_code.1": 1,
      # sum.stats.pkgbuild.cc1plus:error:unrecognized-command-line-option‘-Wno-unknown-warning-option’[-Werror]: 2
      if stat.match(/:error:|\.error$|\.exit_code\./i)
        job['summary.any_error'] = 1
        summary_result = 'error'
      end

      if stat.match(/\.fail$/i)
        job['summary.any_fail'] = 1
        summary_result = 'fail'
      end
    end
    return unless summary_result.empty?

    job['summary.success'] = 1
  end

  # set jobs summary fields information in place
  def add_jobs_summary
    @jobs.each do |job|
      stats = job['stats']
      next unless stats

      add_job_summary(stats, job)
    end
  end

  def get_all_metrics(jobs)
    metrics = []
    jobs.each do |job|
      stats = job['stats']
      next unless stats

      metrics.concat(stats.keys)
    end
    metrics.uniq
  end

  # 初始化结果统计表
  def initialize_result_hash(metrics)
    result = {
      'kvcount' => {},
      'raw.id' => {},
      'sum.stats' => {},
      'raw.stats' => {},
      'avg.stats' => {},
      'max.stats' => {},
      'min.stats' => {}
    }
    metrics.each { |metric| result['raw.stats'][metric] = [] }
    result
  end

  def set_default_value(result, stats, metrics)
    left_metrics = metrics - stats.keys
    left_metrics.each { |metric| result['raw.stats'][metric] }

    stats.each do |key, value|
      result['raw.stats'][key] << value
    end
  end

  def kvcount(result, job)
    KEYWORD.each do |keyword|
      next unless job[keyword]

      result['kvcount']["#{keyword}=#{job[keyword]}"] ||= 0
      result['kvcount']["#{keyword}=#{job[keyword]}"] += 1
      result['raw.id']["[#{keyword}=#{job[keyword]}]"] ||= []
      result['raw.id']["[#{keyword}=#{job[keyword]}]"] << job['id']
    end
  end

  def assemble_element(key, value, result)
    if key.end_with?('.element')
      value.each do |one_value_array|
        one_value_array.each do |element|
          result['sum.stats']["#{key}: #{element}"] ||= 0
          result['sum.stats']["#{key}: #{element}"] += 1
        end
      end
    else
      result['avg.stats'][key] = value.compact.sum / value.compact.size.to_f
      result['max.stats'][key] = value.compact.max
      result['min.stats'][key] = value.compact.min
    end
    result
  end

  def stats_count(result)
    result['raw.stats'].each do |key, value|
      next if key.end_with?('.message')

      if function_stat?(key)
        result['sum.stats'][key] = value.compact.size
      else
        result = assemble_element(key, value, result)
      end
    end
  end

  # 对所有jobs状态统计
  def query_jobs_state(jobs)
    metrics = get_all_metrics(jobs)
    result = initialize_result_hash(metrics)
    jobs.each do |job|
      kvcount(result, job)
      stats = job['stats']
      next unless stats

      set_default_value(result, stats, metrics)
    end

    stats_count(result)
    result
  end

  # 输出result为yaml
  def output_yaml(prefix, result)
    result.each do |key, value|
      prefix_key = if prefix.empty?
                     key.to_s
                   else
                     "#{prefix}.#{key}"
                   end

      if value.is_a? Hash
        output_yaml(prefix_key, value)
      else
        puts "#{prefix_key}: #{value.to_json}"
      end
    end
  end

  # 输出结果入口:output
  def output_kvcount
    output_yaml('', @jobs_kvcount) if @jobs_kvcount
  end

  # 生成统计结果result的入口
  def generate_kvcount
    if @jobs.empty?
      puts 'No query result is found'
      return
    end
    @jobs_kvcount = query_jobs_state(@jobs)
    @jobs_kvcount['kvcount'] = @jobs_kvcount['kvcount'].sort.to_h
    @jobs_kvcount['raw.id'] = @jobs_kvcount['raw.id'].sort.to_h
    @jobs_kvcount
  end

  def set_show_type(show_type)
    @show_type = show_type || (
      if @output_fields.first == '*'
        'json'
      else
        'array'
      end
    )
  end

  def set_output_fields
    @output_fields = merge_fields(DEFAULT_FIELDS, @field_specs)
  end

  def set_query_fields
    if @is_errid
      @query_fields = %w[errid result_root start_time]
    else
      @query_fields = transform_query_fields(output_fields)
    end

    if @group_fields
      @query_fields.concat(@group_fields)
    end

    if @last_time
      @query_fields.push('start_time')
    end

    @query_fields = ['*'] if query_fields[0] == '*'
  end

  def set_query_hash
    @query_hash = read_cci_credentials
    @query_hash['query_index'] = 'jobs'
    @query_hash['query_field'] = @query_fields.uniq.join(',')
    @query_hash['query_where'] = @where_list.join(' AND ') unless @where_list.empty?
    @query_hash['query_condition'] = QUERY_CONDITION + " limit #{@size}"
  end

  def transform_query_fields(output_fields)
    more_fields = []
    fields = output_fields.map do |f|
      case f
      when /^kvcount\.(.*)=/
        @has_kvcount = true
        $1
      when /^raw\.([^.]+)\.\[(.*)=/
        @has_kvcount = true
        more_fields << $1
        $2
      else
        f
      end
    end
    fields.concat more_fields
  end

end
